\documentclass{ctexart}

\usepackage{amsmath}
\usepackage{amssymb}

\title{数据科学的数学基础\\第三次课堂作业}
\author{邵柯欣 \\ 学号：3200103310}

\begin{document}
\maketitle
\section{$2.1$}
f is pdf. $E[x] = 3, Var[X] = 10, n = 10, \bar(X) = \frac{1}{10}\sum X_i$.
\subsection{What is $E[\bar{X}]$?}
$E[\bar{X}] = E[\frac{1}{n} \sum X_i] = E[X] = 3.$
\subsection{What is $Var[\bar{X}]$?}
$Var[\bar{X}] = \frac{1}{n^2}Var[\sum X_i] = \frac{1}{n} Var[X] = 1.$
\subsection{What is the standard deviation of $\bar{X}$?}
The standard deviation of $\bar{X} = \sqrt{Var[\bar{X}]} = 1$.

\subsection{Which is larger, $Pr[X > 4] or Pr[\bar{X} > 4]$?}
$Pr[X > 4] < Pr[\bar{X} > 4]$.

\subsection{Which is larger, $Pr[X > 2] or Pr[\bar{X} > 2]$?}
$Pr[X > 4] > Pr[\bar{X} > 4]$.

\section{$2.2$}
已知$E[X] = 0$, \\
suppose $Y = X+1$, then $E[Y] = E[X]+1 = 1$.\\
hence $Pr[X > 1.5] = Pr[Y > 2.5] \le \frac{E[Y]}{2.5} = 0.4$.
\section{$2.4$}
$E[X] = 5, Var[X] = 100. n = 16, \bar{X} = \frac{1}{16}\sum X_i$.
\subsection{What is $E[\bar{X}]$?}
$E[\bar{X}] = E[\frac{1}{n} \sum X_i] = E[X] = 5$.

\subsection{What is $Var[\bar{X}]$?}
$Var[\bar{X}] = \frac{1}{n^2}Var[\sum X_i] = \frac{1}{n} Var[X] = 6.25$.

\subsection{Use the Markov inequality to upper-bound $Pr[\bar{X} > 8]$.}
$Pr[\bar{X} > 8] \le \frac{E[\bar{X}]}{8} = 0.625$.

\subsection{Use the Chebyshev inequality to upper-bound $Pr[\bar{X} > 8]$.}
$Pr[\bar{X} > 8] \le Pr[|\bar{X} - E[\bar{X}]| \ge 3] \le\frac{Var[\bar{X}]}{3^2} = 0.6944$.

\subsection{Use the Chernoff-Hoeffiding inequality to upper-bound $Pr[\bar{X} > 8]$.}
$Pr[\bar{X} > 8] \le Pr[|\bar{X} - E[\bar{X}]| \ge 3] \le 2*e^{\frac{-2*3^2*16}{20^2}} = 2*e^{-\frac{18}{25}} = 0.9735$.

\subsection{If we increase n to 100, how will the above three bounds be affected ?}
当$n = 100$,\\
Markov inequality 不变;\\
Chebyshev inequality 减小;\\
Chernoff-Hoeffiding inequality 减小。

\section{$2.5$}
\subsection{Use Chebyshev's inequality to determine a value n so that $Pr[|\bar{X} - \mu| \ge 1] \le 0.5$.}
$Pr[|\bar{X} - \mu| \ge 1] \le \frac{Var[\bar{X}]}{1^2}$.\\
所以，$Var[\bar{X}] = \frac{64}{n} \ge 0.5$ i.e. $n \le 128$。

\subsection{Use Chebyshev's inequality to determine a value n so that $Pr[|\bar{X} - \mu| \ge 0.1] \le 0.1$.}
$Pr[|\bar{X} - \mu| \ge 0.1] \le \frac{Var[\bar{X}]}{0.1^2}$.\\
所以，$Var[\bar{X}] = \frac{64}{0.1^2*n} \ge 10$ i.e. $n \le 64000$。

\subsection{Use the Chernoff-Hoeffiding bound to determine a value n so that $Pr[|\bar{X} - \mu| \ge 1] \le 0.5$.}
$Pr[|\bar{X} - \mu| \ge 1] \le 2*e^{\frac{-2*1^2*n}{20^2}}$.\\
所以，$2*e^{\frac{-2*1^2*n}{20^2}} \ge 0.5$ i.e. $n \le 400*ln(2)$。

\subsection{Use the Chernoff-Hoeffiding bound to determine a value n so that $Pr[|\bar{X} - \mu| \ge 0.1] \le 0.1$.}
$Pr[|\bar{X} - \mu| \ge 0.1] \le 2*e^{\frac{-2*0.1^2*n}{20^2}}$.\\
所以，$2*e^{\frac{-2*0.1^2*n}{20^2}} \ge 0.1$ i.e. $n \le 20000*ln(20)$。

\section{$2.6$}
$E[C] = 3, Var[C] = 1, E[T] = 2, Var[T] = 5.$
\subsection{Use Markov's inequality to upper-bound the probability that I buy 4 or more coffees, and the same for teas : $Pr[C \ge 4] and Pr[T \ge 4]$. }
$Pr[C \ge 4] \le \frac{E[C]}{4} = 0.75$;\\
$Pr[T \ge 4] \le \frac{E[T]}{4} = 0.5$.

\subsection{Use Chebyshev's inequality to upper-bound the probability that I buy 4 or more coffees, and the same for teas : $Pr[C \ge 4] and Pr[T \ge 4]$. }
$Pr[C \ge 4] \le Pr[|C - E[C]| \ge 1] \le \frac{Var[C]}{1^2} = 1$;\\
$Pr[T \ge 4] \le Pr[|T - E[T]| \ge 2] \le \frac{Var[T]}{2^2} = 1.25$ i.e. $Pr[T \ge 4] \le 1$.

\section{$2.7$}
scores in $[0, 100]$, $E[X] = 82$, standard deviation is $4$, $Var[X] = 16$.
\subsection{Using Chebyshev's inequality, what percentage of the tests have a grade of at least 70 and at most 94 ?}
$X \in [70, 94]$, $|X - E[X]| \le 12$\\
$Pr[|X - E[X]| \le 12] \ge 1 - \frac{Var[X]}{12^2} = \frac{8}{9}$.

\subsection{Using Markov's inequality, what is the highest percentage of tests which could have a score less than 60 ?}
$Pr[X < 60] = Pr[(100-X) > 40] \ge \frac{100-E[X]}{40} = 0.45$.

\end{document}